/* Population counts */

#delimit ;
clear all;
local outfile "GWgap_postreadin_v4";
set more off;


di _n "$S_DATE $S_TIME";




********************************************************************************;
* population counts for people;


use if Q_inprod using GWgap_pr_IDI_v4;
keep ind4;
drop if ind4==.;
duplicates drop;
save temp_prody_inds, replace;



use GWgap_pr_IDI_v4, clear;

merge m:1 ind4 using temp_prody_inds, keep(master match) gen(prody);

gen in_prod = prody==3;
drop prody;

rename hp_pent pent;
merge m:1 pent year using GWgap_pr_firm_v4, keep(master match) nogen keepus(av_fte_m av_fte_f
	pf_ind av_fte_lt25 av_fte_25to39 av_fte_40to54 av_fte_55p L_hc_t);
	
foreach var in av_fte_m av_fte_f
	pf_ind av_fte_lt25 av_fte_25to39 av_fte_40to54 av_fte_55p L_hc_t {;
	
		rename `var' `var'_;
};

gen msg_things = age==. | max_fte_employee_av==. | indiv_avfte_hp==.;
drop max_gross_earn_yr max_fte_employee_av max_mon_wkd num_pents age female wkd_hpp_1ya 
	wkd_hpp_2ya anz06_4d multiplant L_hc lnWpm_hp indiv_avfte_hp num_pents_cat age_cat  
	lnIDI_earnhp;
merge m:1 pent year using GWgap_pr_firm_hclt5_v4, keep(master match) nogen keepus(av_fte_m av_fte_f
	pf_ind av_fte_lt25 av_fte_25to39 av_fte_40to54 av_fte_55p L_hc_t);
	
	
foreach var in av_fte_m av_fte_f
	pf_ind av_fte_lt25 av_fte_25to39 av_fte_40to54 av_fte_55p L_hc_t {;
	
		capture replace `var' = `var'_ if `var'==.;
		capture replace `var' = `var'_ if `var'=="";
};


*** grrnum confidentialises observation counts;
* full non-WP sample;

count;
*grrnum r(N), base(3) seed(7610);

* dropping people missing IDI data those without identification of employer;

drop if msg_things==1 | substr(pent,1,2)=="ZZ";

count;
*grrnum r(N), base(3) seed(7610);

* dropping those employed by firms not in the prody sector;

drop if in_prod==0;

count;
*grrnum r(N), base(3) seed(7610);

* dropping firms with bad prody data or other missing data;

keep if Q_gprod==1 & L_hc_t!=. & av_fte_m<. & av_fte_f<. & pf_ind!=""
		& av_fte_lt25<. & av_fte_25to39<. & av_fte_40to54<. &  av_fte_55p<.;
		
count;
*grrnum r(N), base(3) seed(7610);

* dropping firms with <5 employees;

drop if L_hc_t<5;

count;
*grrnum r(N), base(3) seed(7610);

* dropping firms outside connected set;

keep if Q_ffe==1;

count;
*grrnum r(N), base(3) seed(7610);






********************************************************************************;
* population counts for firms;


#delimit ;
use GWgap_pr_firm_v4, clear;
keep if Q_inprod==1;
keep pf_ind;
drop if pf_ind=="";
duplicates drop;
save temp_prody_indsb, replace;


use GWgap_pr_firm_v4, clear;
append using GWgap_pr_firm_hclt5_v4;

merge m:1 pf_ind using temp_prody_indsb, keep(master match) gen(prody);

gen in_prod = prody==3;
label var in_prod "In a productive industry";
drop prody;

merge m:1 pent using temp_indy_v3, keep(master match) keepus(anz06_4d);



* full sample;

count;
*grrnum r(N), base(3) seed(7610);

* dropping firms with zero employees;

drop if L_hc_t==0;

count;
*grrnum r(N), base(3) seed(7610);

* dropping firms not in the prody sector;

drop if in_prod==0;

count;
*grrnum r(N), base(3) seed(7610);

* dropping firms with bad prody data or other missing data;

keep if Q_gprod==1 & L_hc_t!=. & av_fte_m<. & av_fte_f<. & pf_ind!=""
		& av_fte_lt25<. & av_fte_25to39<. & av_fte_40to54<. &  av_fte_55p<.;
		
count;
*grrnum r(N), base(3) seed(7610);

* dropping firms with <5 employees;

drop if L_hc_t<5;

count;
*grrnum r(N), base(3) seed(7610);

* dropping firms outside connected set;

keep if Q_ffe==1;

count;
*grrnum r(N), base(3) seed(7610);


